library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)
library(usmap)


options(
  tigris_class = "sf",
  tigris_use_cache = TRUE
)

Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")

Load social distancing data

Load the Safegraph social distancing data

# bay_blockgroups <- readRDS("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/bay_blockgroups.rds")

bay_sd <- readRDS("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/bay_socialdistancing_v2.rds") %>% 
  mutate(date = date_range_start %>%  substr(1,10) %>% as.Date())

# obtaining weekends
weekends <- bay_sd %>% 
  filter(!duplicated(date)) %>% 
  arrange(date) %>% 
  mutate(weekend = ifelse((date %>% as.numeric()) %% 7 %in% c(2,3), T, F)) %>% 
  dplyr::select(date,weekend)

bay_sd <- bay_sd %>% left_join(weekends)

# date of the shelter in place order
shelter_start <- "2020-03-16" %>% as.Date()

# store an average of percent of devices completely at home since the shelter in place order started on weekdays
bay_sd_at_home_average <- bay_sd %>% 
  filter(weekend == F) %>% 
  filter(date > shelter_start) %>%
  group_by(origin_census_block_group) %>% 
  summarize(completely_home_device_count = sum(completely_home_device_count), device_count = sum(device_count)) %>% 
  mutate(`% Completely at Home` = (completely_home_device_count/device_count*100) %>% round(1), `% Not Completely at Home` = (100 - `% Completely at Home`))

# store average of percent of devices completely at home for January and February on weekdays
bay_pre_sd_at_home_average <- bay_sd %>% 
  filter(weekend == F) %>% 
  filter(date <  as.Date("2020-03-01")) %>%
  group_by(origin_census_block_group) %>% 
  summarize(completely_home_device_count = sum(completely_home_device_count), device_count = sum(device_count)) %>% 
  mutate(`% Completely at Home Pre Shelter` = (completely_home_device_count/device_count*100) %>% round(1), `% Not Completely at Home Pre Shelter` = (100 - `% Completely at Home Pre Shelter`))

bay_sd_at_home_average <- bay_sd_at_home_average %>% left_join(bay_pre_sd_at_home_average %>% dplyr::select(origin_census_block_group, `% Completely at Home Pre Shelter`, `% Not Completely at Home Pre Shelter`))

I next obtain various demographic data and plot them against social distancing behavior, and examine for correlations.

# obtain the saved census data 
setwd("~/Documents/2020 Spring Quarter/CEE 218Z")
acs_vars = readRDS("censusData2018_acs_acs5.rds")
setwd("~/Documents/2020 Spring Quarter/CEE 218Z/covid19")

# get FIPS codes for CA counties
bay_area_counties <- lapply(fips("CA", c("Alameda", "Contra Costa", "Marin", "Napa", "San Francisco", "San Mateo", "Santa Clara", "Solano", "Sonoma")), function(x) substr(x,3,5))

# define a function for pulling census data
pullCensus <- function(variableToPull, counties) {
  censusData <- NULL
  for (i in 1:length(counties)) {
    county <- counties[i]
    regionString <- paste0("state:06+county:", county)
    censusDataCounty <- getCensus(
      name = "acs/acs5",
      vintage = 2018,
      region = "block group:*", 
      regionin = regionString,
      vars = variableToPull
    ) %>%
    mutate(blockgroup = paste0(state,county,tract,block_group)) %>%
      select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME"))
    censusData <- rbind(censusData, censusDataCounty)
  }
  
  return(censusData)
}

Income

# load in income data - code adapted from other students
bay_median_income_by_block <-
  pullCensus("B19013_001E", bay_area_counties) %>% 
  filter(blockgroup %in% bay_sd$origin_census_block_group) %>%
  rename(
    Median_Income = B19013_001E 
  ) %>% 
  filter(!is.na(Median_Income)) %>% 
  left_join(bay_sd_at_home_average, by = c("blockgroup" = "origin_census_block_group")) %>% 
  filter(!is.na(device_count)) 

bay_ami_by_block <-
  pullCensus("group(B19001)", bay_area_counties) %>%
  dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  filter(blockgroup %in% bay_sd$origin_census_block_group) %>%
  group_by(blockgroup) %>% 
  summarize(
    Total = B19001_001E,
    `Under 75,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E),
    #sum(lapply(2:12, function(x) as.name(paste0("B19001_00",x,"E"))))
    `Under 100,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E),
    `Under 125,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E, B19001_014E),
    `Under 150,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E, B19001_014E, B19001_015E)
  ) %>% 
  mutate(
    `% under 75,000` = `Under 75,000` / Total * 100,
    `% over 75,000` = (100 - `% under 75,000`),
    `% under 100,000` = `Under 100,000` / Total * 100,
    `% over 100,000` = (100 - `% under 100,000`),
    `% under 125,000` = `Under 125,000` / Total * 100,
    `% over 125,000` = (100 - `% under 125,000`),
    `% under 150,000` = `Under 150,000` / Total * 100,
    `% over 150,000` = (100 - `% under 150,000`),
  ) %>% 
  left_join(bay_median_income_by_block %>% dplyr::select(-Median_Income)
  ) %>% 
  filter(!is.na(device_count))

# plotting
bay_ami_by_block %>% 
  ggplot(aes(
  x = `% over 75,000`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $75,000 annually",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Households Above $75,000"
  )

income_75_model <- lm(`% Not Completely at Home` ~ `% over 75,000`, bay_ami_by_block)
summary(income_75_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% over 75,000`, data = bay_ami_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.950  -5.481  -0.589   5.004  42.475 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     62.80723    0.40400  155.46   <2e-16 ***
## `% over 75,000` -0.15283    0.00635  -24.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.439 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1091, Adjusted R-squared:  0.1089 
## F-statistic: 579.2 on 1 and 4728 DF,  p-value: < 2.2e-16
# income - less than $100000
bay_ami_by_block %>% 
  ggplot(aes(
  x = `% over 100,000`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $100,000 annually",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Households Above $100,000"
  )

income_100_model <- lm(`% Not Completely at Home` ~ `% over 100,000`, bay_ami_by_block)
summary(income_100_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% over 100,000`, data = bay_ami_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.841  -5.502  -0.610   4.829  44.423 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      61.223085   0.312190  196.11   <2e-16 ***
## `% over 100,000` -0.156457   0.005862  -26.69   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.335 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.131,  Adjusted R-squared:  0.1308 
## F-statistic: 712.5 on 1 and 4728 DF,  p-value: < 2.2e-16
# income - less than $125000
bay_ami_by_block %>% 
  ggplot(aes(
  x = `% over 125,000`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $125,000 annually",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Households Above $125,000"
  )

income_125_model <- lm(`% Not Completely at Home` ~ `% over 125,000`, bay_ami_by_block)
summary(income_125_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% over 125,000`, data = bay_ami_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.857  -5.389  -0.552   4.719  46.493 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      60.027700   0.259526   231.3   <2e-16 ***
## `% over 125,000` -0.165202   0.005859   -28.2   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.272 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1439, Adjusted R-squared:  0.1438 
## F-statistic:   795 on 1 and 4728 DF,  p-value: < 2.2e-16
# income - less than $150000
bay_ami_by_block %>% 
  ggplot(aes(
  x = `% over 150,000`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $150,000 annually",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Households Above $150,000"
  )

income_150_model <- lm(`% Not Completely at Home` ~ `% over 150,000`, bay_ami_by_block)
summary(income_150_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% over 150,000`, data = bay_ami_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.271  -5.437  -0.574   4.741  44.959 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      58.891344   0.226982   259.5   <2e-16 ***
## `% over 150,000` -0.171093   0.006154   -27.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.289 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1405, Adjusted R-squared:  0.1403 
## F-statistic:   773 on 1 and 4728 DF,  p-value: < 2.2e-16

Compare to pre-shelter-in-place behavior:

bay_ami_by_block %>% 
  ggplot(aes(
  x = `% over 75,000`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $75,000 annually",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Households Above $75,000 Pre Shelter-in-Place"
  )

income_75_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% over 75,000`, bay_ami_by_block)
summary(income_75_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% over 75,000`, 
##     data = bay_ami_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.095  -2.942   0.245   3.290  20.435 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     70.965396   0.239540  296.26   <2e-16 ***
## `% over 75,000`  0.113036   0.003765   30.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.003 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1601, Adjusted R-squared:  0.1599 
## F-statistic: 901.3 on 1 and 4728 DF,  p-value: < 2.2e-16
# income - less than $100000
bay_ami_by_block %>% 
  ggplot(aes(
  x = `% over 100,000`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $100,000 annually",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Households Above $100,000 Pre Shelter-in-Place"
  )

income_100_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% over 100,000`, bay_ami_by_block)
summary(income_100_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% over 100,000`, 
##     data = bay_ami_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -30.2334  -2.9109   0.3057   3.3078  18.9262 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      72.473764   0.186304  389.01   <2e-16 ***
## `% over 100,000`  0.108863   0.003498   31.12   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.974 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:   0.17,  Adjusted R-squared:  0.1699 
## F-statistic: 968.6 on 1 and 4728 DF,  p-value: < 2.2e-16
# income - less than $125000
bay_ami_by_block %>% 
  ggplot(aes(
  x = `% over 125,000`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $125,000 annually",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Social Distancing and Households Above $125,000 Pre Shelter-in-Place"
  )

income_125_model <- lm(`% Not Completely at Home Pre Shelter` ~ `% over 125,000`, bay_ami_by_block)
summary(income_125_model)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% over 125,000`, 
##     data = bay_ami_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -31.3512  -2.8027   0.3572   3.2505  17.9572 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      73.442753   0.155512  472.26   <2e-16 ***
## `% over 125,000`  0.111451   0.003511   31.74   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.957 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1757, Adjusted R-squared:  0.1755 
## F-statistic:  1008 on 1 and 4728 DF,  p-value: < 2.2e-16
# income - less than $150000
bay_ami_by_block %>% 
  ggplot(aes(
  x = `% over 150,000`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $150,000 annually",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Social Distancing and Households Above $150,000 Pre Shelter-in-Place"
  )

income_150_model <- lm(`% Not Completely at Home Pre Shelter` ~ `% over 150,000`, bay_ami_by_block)
summary(income_150_model)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% over 150,000`, 
##     data = bay_ami_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.373  -2.878   0.360   3.302  17.222 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      74.178083   0.135838  546.08   <2e-16 ***
## `% over 150,000`  0.116426   0.003683   31.61   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.96 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1745, Adjusted R-squared:  0.1743 
## F-statistic: 999.5 on 1 and 4728 DF,  p-value: < 2.2e-16

Language

# loading in language data - code adapted from other students
bay_lang_by_block <-
  pullCensus("group(B16004)", bay_area_counties) %>%
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(
    key = "variable",
    value = "estimate", 
    - blockgroup
  ) %>% 
  left_join(acs_vars, by = c("variable" = "name")) %>% 
  mutate(
    tier = substr(label,lapply(label, function(x) max(unlist(gregexpr('!!',x)))+2),nchar(label))
  ) %>% 
  filter(tier %in% c('Speak English "not well"', 
                     'Speak English "not at all"', 
                     'Total', 'Speak Spanish', 
                     'Speak Asian and Pacific Island languages')) %>% 
  group_by(blockgroup, tier) %>% 
  summarise(
    estimate1 = sum(estimate)
  ) %>% 
  spread(
    key = "tier",
    value = "estimate1"
  ) %>% 
  mutate(
    `% speaking english < well` = (`Speak English "not well"` + `Speak English "not at all"`) / Total * 100,
    `% speaking english > well` = (100 - `% speaking english < well`),
    `% speaking spanish` = (`Speak Spanish`/ Total) * 100,
    `% not speaking spanish` = (100 - `% speaking spanish`),
    `% speaking api` = (`Speak Asian and Pacific Island languages` / Total) * 100
  ) %>% 
  left_join(bay_median_income_by_block %>% dplyr::select(-Median_Income)) %>% 
  filter(!is.na(device_count)) %>% 
  mutate(log_perc = log(`% speaking english < well`))

# plotting
bay_lang_by_block %>% 
  ggplot(aes(
  x = `% speaking english > well`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals speaking English well",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and English Language Ability"
  )

english_ability_model <- lm(`% Not Completely at Home` ~ `% speaking english > well`, bay_lang_by_block)
summary(english_ability_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% speaking english > well`, 
##     data = bay_lang_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.050  -5.829  -0.300   5.478  40.573 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 54.24165    1.42632  38.029   <2e-16 ***
## `% speaking english > well` -0.00720    0.01538  -0.468     0.64    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.989 on 4734 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  4.626e-05,  Adjusted R-squared:  -0.000165 
## F-statistic: 0.219 on 1 and 4734 DF,  p-value: 0.6398
bay_lang_by_block %>% 
  ggplot(aes(
  x = `% not speaking spanish`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals not speaking Spanish",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Spanish Language Ability"
  )

spanish_speaking_model <- lm(`% Not Completely at Home` ~ `% not speaking spanish`, bay_lang_by_block)
summary(spanish_speaking_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% not speaking spanish`, 
##     data = bay_lang_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.789  -5.682  -0.491   5.102  41.431 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              63.688477   0.622427  102.32   <2e-16 ***
## `% not speaking spanish` -0.120385   0.007255  -16.59   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.739 on 4734 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.05497,    Adjusted R-squared:  0.05477 
## F-statistic: 275.4 on 1 and 4734 DF,  p-value: < 2.2e-16

Compare to pre-shelter-in-place behavior:

bay_lang_by_block %>% 
  ggplot(aes(
  x = `% speaking english > well`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals speaking English well",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and English Language Ability Pre Shelter-in-Place"
  )

english_ability_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% speaking english > well`, bay_lang_by_block)
summary(english_ability_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% speaking english > well`, 
##     data = bay_lang_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.866  -3.136   0.362   3.676  14.914 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 61.829284   0.837774   73.80   <2e-16 ***
## `% speaking english > well`  0.173148   0.009037   19.16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.279 on 4732 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.07199,    Adjusted R-squared:  0.0718 
## F-statistic: 367.1 on 1 and 4732 DF,  p-value: < 2.2e-16
bay_lang_by_block %>% 
  ggplot(aes(
  x = `% not speaking spanish`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals not speaking Spanish",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Spanish Language Ability Pre Shelter-in-Place"
  )

spanish_speaking_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% not speaking spanish`, bay_lang_by_block)
summary(spanish_speaking_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% not speaking spanish`, 
##     data = bay_lang_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.848  -3.159   0.413   3.603  13.869 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              70.818907   0.376268  188.21   <2e-16 ***
## `% not speaking spanish`  0.083275   0.004386   18.99   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.283 on 4732 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.0708, Adjusted R-squared:  0.07061 
## F-statistic: 360.6 on 1 and 4732 DF,  p-value: < 2.2e-16

Age

# loading in age data - specifically looking at percentage 65+ and percentage <30
bay_age_by_block <- 
  pullCensus("group(B01001)", bay_area_counties) %>%
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(
    key = "variable",
    value = "estimate", 
    - blockgroup
  ) %>% 
  mutate(
    label = acs_vars$label[match(variable,acs_vars$name)]
  ) %>% 
  select(-variable) %>% 
  separate(
    label,
    into = c(NA,NA,"sex","age"),
    sep = "!!"
  ) %>% filter(!is.na(age)) %>% 
  mutate(elderly = ifelse(age %in% c("65 and 66 years", "67 to 69 years", "70 to 74 years", "75 to 79 years", "80 to 84 years", "85 years and over"), estimate, NA), `less than 30` = ifelse(age %in% c("Under 5 years", "5 to 9 years", "10 to 14 years", "15 to 17 years", "18 and 19 years", "20 years", "21 years", "22 to 24 years", "25 to 29 years"), estimate, NA), `less than 18` = ifelse(age %in% c("Under 5 years", "5 to 9 years", "10 to 14 years", "15 to 17 years"), estimate, NA), `20-29` = ifelse(age %in% c("20 years", "21 years", "22 to 24 years", "25 to 29 years"), estimate, NA)) %>% 
  group_by(blockgroup) %>% 
  summarize(elderly = sum(elderly, na.rm = T), `less than 30` = sum(`less than 30`, na.rm = T), total = sum(estimate, na.rm = T), `less than 18` = sum(`less than 18`, na.rm = T), `20-29` = sum(`20-29`, na.rm = T)) %>% 
  mutate(`percent elderly` = elderly*100 / total, `percent less than 30` = `less than 30`*100 / total, `percent nonelderly` = (100 - `percent elderly`), `percent less than 18` = `less than 18`*100/total, `percent 20-29` = `20-29`*100/total) %>% 
  left_join(bay_median_income_by_block %>% dplyr::select(-Median_Income)) %>% 
  filter(!is.na(device_count)) 

# plotting
bay_age_by_block %>%
  ggplot(aes(
  x = `percent less than 30`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents younger than 30",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Young Age Groups"
  )

young_model <- lm(bay_age_by_block$`% Not Completely at Home` ~ bay_age_by_block$`percent less than 30`)
summary(young_model)
## 
## Call:
## lm(formula = bay_age_by_block$`% Not Completely at Home` ~ bay_age_by_block$`percent less than 30`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.612  -5.685  -0.324   5.298  37.581 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             49.21355    0.45666 107.769   <2e-16
## bay_age_by_block$`percent less than 30`  0.12308    0.01235   9.963   <2e-16
##                                            
## (Intercept)                             ***
## bay_age_by_block$`percent less than 30` ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.897 on 4734 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.02054,    Adjusted R-squared:  0.02033 
## F-statistic: 99.26 on 1 and 4734 DF,  p-value: < 2.2e-16
bay_age_by_block %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
  ggplot(aes(
  x = `percent elderly`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents 65 and older",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Elderly Population"
  )

elderly_model <- lm(`% Not Completely at Home` ~ `percent elderly`, bay_age_by_block %>% filter(`percent elderly` < 50))
summary(elderly_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `percent elderly`, 
##     data = bay_age_by_block %>% filter(`percent elderly` < 50))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.740  -5.773  -0.321   5.499  39.732 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       54.40160    0.27777 195.853  < 2e-16 ***
## `percent elderly` -0.05656    0.01604  -3.527 0.000425 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.957 on 4693 degrees of freedom
## Multiple R-squared:  0.002643,   Adjusted R-squared:  0.002431 
## F-statistic: 12.44 on 1 and 4693 DF,  p-value: 0.0004248
bay_age_by_block %>%
  ggplot(aes(
  x = `percent 20-29`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents ages 20-29",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Young Adults"
  )

young_adult_model <- lm(bay_age_by_block$`% Not Completely at Home` ~ bay_age_by_block$`percent 20-29`)
summary(young_adult_model)
## 
## Call:
## lm(formula = bay_age_by_block$`% Not Completely at Home` ~ bay_age_by_block$`percent 20-29`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.331  -5.759  -0.354   5.323  41.888 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      50.75100    0.24379  208.17   <2e-16 ***
## bay_age_by_block$`percent 20-29`  0.21104    0.01549   13.63   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.818 on 4734 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.03774,    Adjusted R-squared:  0.03753 
## F-statistic: 185.7 on 1 and 4734 DF,  p-value: < 2.2e-16
bay_age_by_block %>%
  ggplot(aes(
  x = `percent less than 18`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents younger than 18",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Children"
  )

child_model <- lm(bay_age_by_block$`% Not Completely at Home` ~ bay_age_by_block$`percent less than 18`)
summary(child_model)
## 
## Call:
## lm(formula = bay_age_by_block$`% Not Completely at Home` ~ bay_age_by_block$`percent less than 18`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.559  -5.915  -0.364   5.577  44.592 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             56.19993    0.34997  160.59  < 2e-16
## bay_age_by_block$`percent less than 18` -0.13057    0.01618   -8.07 8.85e-16
##                                            
## (Intercept)                             ***
## bay_age_by_block$`percent less than 18` ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.928 on 4734 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.01357,    Adjusted R-squared:  0.01336 
## F-statistic: 65.12 on 1 and 4734 DF,  p-value: 8.847e-16

Compare to pre-shelter-in-place behavior:

bay_age_by_block %>%
  ggplot(aes(
  x = `percent less than 30`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents younger than 30",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Young Age Groups Pre Shelter-in-Place"
  )

young_model2 <- lm(bay_age_by_block$`% Not Completely at Home Pre Shelter` ~ bay_age_by_block$`percent less than 30`)
summary(young_model2)
## 
## Call:
## lm(formula = bay_age_by_block$`% Not Completely at Home Pre Shelter` ~ 
##     bay_age_by_block$`percent less than 30`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.649  -3.332   0.283   3.749  15.296 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             78.773529   0.281877 279.460  < 2e-16
## bay_age_by_block$`percent less than 30` -0.027096   0.007631  -3.551 0.000388
##                                            
## (Intercept)                             ***
## bay_age_by_block$`percent less than 30` ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.473 on 4732 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.002657,   Adjusted R-squared:  0.002447 
## F-statistic: 12.61 on 1 and 4732 DF,  p-value: 0.0003878
bay_age_by_block %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
  ggplot(aes(
  x = `percent elderly`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents 65 and older",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Elderly Population Pre Shelter-in-Place"
  )

elderly_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `percent elderly`, bay_age_by_block %>% filter(`percent elderly` < 50))
summary(elderly_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `percent elderly`, 
##     data = bay_age_by_block %>% filter(`percent elderly` < 50))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.714  -3.316   0.331   3.681  14.810 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       76.546794   0.167597 456.730   <2e-16 ***
## `percent elderly`  0.085606   0.009675   8.848   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.399 on 4691 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01642,    Adjusted R-squared:  0.01621 
## F-statistic: 78.29 on 1 and 4691 DF,  p-value: < 2.2e-16
bay_age_by_block %>%
  ggplot(aes(
  x = `percent 20-29`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents 20-29",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Young Adults Pre Shelter-in-Place"
  )

young_adult_model2 <- lm(bay_age_by_block$`% Not Completely at Home Pre Shelter` ~ bay_age_by_block$`percent 20-29`)
summary(young_adult_model2)
## 
## Call:
## lm(formula = bay_age_by_block$`% Not Completely at Home Pre Shelter` ~ 
##     bay_age_by_block$`percent 20-29`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.861  -3.283   0.271   3.622  19.281 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      79.612651   0.149249  533.42   <2e-16 ***
## bay_age_by_block$`percent 20-29` -0.134571   0.009516  -14.14   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.368 on 4732 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.04055,    Adjusted R-squared:  0.04035 
## F-statistic:   200 on 1 and 4732 DF,  p-value: < 2.2e-16
bay_age_by_block %>%
  ggplot(aes(
  x = `percent less than 18`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents younger than 18",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Children Pre Shelter-in-Place"
  )

child_model2 <- lm(bay_age_by_block$`% Not Completely at Home Pre Shelter` ~ bay_age_by_block$`percent less than 18`)
summary(child_model2)
## 
## Call:
## lm(formula = bay_age_by_block$`% Not Completely at Home Pre Shelter` ~ 
##     bay_age_by_block$`percent less than 18`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -31.5690  -3.3708   0.3005   3.7710  15.4110 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             76.269015   0.213768 356.783  < 2e-16
## bay_age_by_block$`percent less than 18`  0.076842   0.009881   7.777 9.06e-15
##                                            
## (Intercept)                             ***
## bay_age_by_block$`percent less than 18` ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.446 on 4732 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.01262,    Adjusted R-squared:  0.01241 
## F-statistic: 60.48 on 1 and 4732 DF,  p-value: 9.058e-15

Vehicles available

# also get data on vehicles available as households without a vehicle
bay_no_vehicles_by_block <- pullCensus("group(B25044)", bay_area_counties) %>%
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  select(-variable) %>%
  separate(label, into = c(NA, NA, NA,"vehicles"), sep = "!!") %>% 
  filter(!is.na(vehicles)) %>%
  group_by(blockgroup, vehicles) %>%
  summarize(grouped_vehicles = sum(estimate)) %>%
  spread(key = vehicles, value = grouped_vehicles) %>%
  mutate(total_nums = `1 vehicle available` + `2 vehicles available` + `3 vehicles available` + `4 vehicles available` + `5 or more vehicles available` + `No vehicle available`, `percent no vehicles` = `No vehicle available`*100 / total_nums, `percent with vehicles` = (100-`percent no vehicles`)) %>%
  left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
  filter(!is.na(device_count))

# plotting
bay_no_vehicles_by_block %>% 
  ggplot(aes(
  x = `percent with vehicles`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of housholds with vehicles available",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Vehicle Availability"
  )

vehicles_model <- lm(`% Not Completely at Home` ~ `percent with vehicles`, bay_no_vehicles_by_block)
summary(vehicles_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `percent with vehicles`, 
##     data = bay_no_vehicles_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.719  -5.880  -0.278   5.498  40.680 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             57.07017    0.95913  59.502  < 2e-16 ***
## `percent with vehicles` -0.03851    0.01038  -3.711 0.000209 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.928 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.002904,   Adjusted R-squared:  0.002693 
## F-statistic: 13.77 on 1 and 4728 DF,  p-value: 0.0002088

Compare to pre-shelter-in-place behavior:

bay_no_vehicles_by_block %>% 
  ggplot(aes(
  x = `percent with vehicles`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of housholds with vehicles available",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Vehicle Availability Pre Shelter-in-Place"
  )

vehicles_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `percent with vehicles`, bay_no_vehicles_by_block)
summary(vehicles_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `percent with vehicles`, 
##     data = bay_no_vehicles_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -30.3199  -3.2767   0.2145   3.5354  22.1954 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             66.925407   0.564331  118.59   <2e-16 ***
## `percent with vehicles`  0.118945   0.006106   19.48   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.253 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.07429,    Adjusted R-squared:  0.0741 
## F-statistic: 379.5 on 1 and 4728 DF,  p-value: < 2.2e-16

Occupants per room

# get data on occupants per room
bay_occupants_per_room_by_block <- pullCensus("group(B25014)", bay_area_counties) %>%
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  select(-variable) %>% 
  separate(label, into = c(NA, NA, NA,"occupants per room"), sep = "!!") %>% 
  filter(!is.na(`occupants per room`)) %>%
  group_by(blockgroup, `occupants per room`) %>%
  summarize(estimate_tot = sum(estimate)) %>% 
  spread(key = `occupants per room`, value = estimate_tot) %>%
  mutate(total_nums = `0.50 or less occupants per room` + `0.51 to 1.00 occupants per room` + `1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`, `percent 1 or more` = (`1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`) * 100/ total_nums, `percent less than 1` = (100-`percent 1 or more`)) %>%
  left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
  filter(!is.na(device_count)) 

# plotting
bay_occupants_per_room_by_block %>% 
  ggplot(aes(
  x = `percent less than 1`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households with 1 or fewer occupant per room",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Room Occupancy"
  )

occupants_model <- lm(`% Not Completely at Home` ~ `percent less than 1`, bay_occupants_per_room_by_block)
summary(occupants_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `percent less than 1`, 
##     data = bay_occupants_per_room_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.441  -5.713  -0.341   5.429  40.742 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           61.92498    1.41735  43.691  < 2e-16 ***
## `percent less than 1` -0.08984    0.01513  -5.938 3.09e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.908 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.007403,   Adjusted R-squared:  0.007193 
## F-statistic: 35.26 on 1 and 4728 DF,  p-value: 3.087e-09

Compare to pre-shelter-in-place behavior:

bay_occupants_per_room_by_block %>% 
  ggplot(aes(
  x = `percent less than 1`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households with 1 or fewer occupant per room",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Room Occupancy Pre Shelter-in-Place"
  )

occupants_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `percent less than 1`, bay_occupants_per_room_by_block)
summary(occupants_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `percent less than 1`, 
##     data = bay_occupants_per_room_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.361  -3.161   0.316   3.671  17.116 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           63.306072   0.842479   75.14   <2e-16 ***
## `percent less than 1`  0.155550   0.008993   17.30   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.295 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.05951,    Adjusted R-squared:  0.05931 
## F-statistic: 299.2 on 1 and 4728 DF,  p-value: < 2.2e-16

Education

bay_education_by_block <- pullCensus("group(B15003)", bay_area_counties) %>%
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  select(-variable) %>% 
  separate(label, into = c(NA, NA, "education level"), sep = "!!") %>% 
  mutate(`education level` = replace_na(`education level`, "total_educ")) %>% # if the education level field is NA, this corresponded to the total number in that blockgroup
  spread(key = `education level`, value = estimate) %>%
  mutate(`percent associates or higher` = (`Associate's degree` + `Bachelor's degree` + `Doctorate degree` + `Master's degree`)*100/total_educ, `percent less than associates` = 100-`percent associates or higher`) %>% 
  left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
  filter(!is.na(device_count)) 

# plotting
bay_education_by_block %>% 
  ggplot(aes(
  x = `percent associates or higher`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of people with an degree at Associate's level or higher",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Education"
  )

educ_model <- lm(`% Not Completely at Home` ~ `percent associates or higher`, bay_education_by_block)
summary(educ_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `percent associates or higher`, 
##     data = bay_education_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.184  -5.530  -0.760   4.744  43.548 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    60.723373   0.334942  181.29   <2e-16 ***
## `percent associates or higher` -0.141764   0.006168  -22.98   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.514 on 4733 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.1004, Adjusted R-squared:  0.1002 
## F-statistic: 528.2 on 1 and 4733 DF,  p-value: < 2.2e-16

Compare to pre-shelter-in-place behavior:

bay_education_by_block %>% 
  ggplot(aes(
  x = `percent associates or higher`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of people with an degree at Associate's level or higher",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and Education Pre Shelter-in-Place"
  )

educ_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `percent associates or higher`, bay_education_by_block)
summary(educ_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `percent associates or higher`, 
##     data = bay_education_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -31.2636  -3.0081   0.4311   3.4893  15.3499 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    73.135228   0.202927  360.40   <2e-16 ***
## `percent associates or higher`  0.092690   0.003737   24.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.155 on 4732 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.1151, Adjusted R-squared:  0.1149 
## F-statistic: 615.3 on 1 and 4732 DF,  p-value: < 2.2e-16

High speed internet access

Motivated by this paper https://www.nber.org/papers/w26982.pdf on social distancing, internet access, and inequality, we look at whether a household has “Broadband (high-speed) Internet service such as cable, fiber optic, or DSL service,” and staying at home.

bay_internet_by_block <- pullCensus("group(B28002)", bay_area_counties) %>%
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  select(-variable) %>% 
  separate(label, into = c(NA, NA, "subscription", "type", "additional"), sep = "!!") %>% 
  filter(is.na(subscription) | (type == "Broadband such as cable, fiber optic or DSL") & is.na(additional)) %>% 
  mutate(type = replace_na(type, "total_num")) %>% 
  dplyr::select(blockgroup, type, estimate) %>%
  spread(key = type, value = estimate) %>%
  mutate(`percent high speed` = `Broadband such as cable, fiber optic or DSL`*100/total_num, `percent no high speed` = 100-`percent high speed`) %>% 
  left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
  filter(!is.na(device_count)) 

# plotting
bay_internet_by_block %>% 
  ggplot(aes(
  x = `percent high speed`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households with broadband such as cable, fiber optic or DSL",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and High Speed Internet"
  )

internet_model <- lm(`% Not Completely at Home` ~ `percent high speed`, bay_internet_by_block)
summary(internet_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `percent high speed`, 
##     data = bay_internet_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.152  -5.569  -0.436   5.004  43.707 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          69.278040   0.737701   93.91   <2e-16 ***
## `percent high speed` -0.198061   0.009154  -21.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.528 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.0901, Adjusted R-squared:  0.0899 
## F-statistic: 468.2 on 1 and 4728 DF,  p-value: < 2.2e-16

Compare to pre-shelter-in-place behavior:

bay_internet_by_block %>% 
  ggplot(aes(
  x = `percent high speed`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households without broadband such as cable, fiber optic or DSL",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Staying at Home and High Speed Internet Pre Shelter-in-Place"
  )

internet_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `percent high speed`, bay_internet_by_block)
summary(internet_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `percent high speed`, 
##     data = bay_internet_by_block)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.3893  -3.1107   0.1585   3.5344  20.8729 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          67.53971    0.44725  151.01   <2e-16 ***
## `percent high speed`  0.12937    0.00555   23.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.171 on 4728 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1031, Adjusted R-squared:  0.1029 
## F-statistic: 543.4 on 1 and 4728 DF,  p-value: < 2.2e-16

Race

bay_race_by_block <- pullCensus("group(B02001)", bay_area_counties) %>% 
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  select(-variable) %>% 
  separate(label, into = c(NA, NA, "race", "specification"), sep = "!!") %>% 
  filter(is.na(specification) & !is.na(race)) %>% 
  dplyr::select(blockgroup, estimate, race) %>%
  spread(key = race, value = estimate) %>% 
  mutate(total_race = `American Indian and Alaska Native alone` + `Asian alone` + `Black or African American alone` + `Native Hawaiian and Other Pacific Islander alone` + `Some other race alone` + `Two or more races` + `White alone`, `% white` = `White alone`*100/total_race, `% Asian` = `Asian alone`*100/total_race, `% black` = `Black or African American alone`*100/total_race) %>%  
  left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
  filter(!is.na(device_count)) 

# also get ethnicity data (hispanic/latino vs not)
bay_hisplat_by_block <- pullCensus("group(B03002)", bay_area_counties) %>% 
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  select(-variable) %>% 
  separate(label, into = c(NA, NA, "hisp/lat", "specification"), sep = "!!") %>%
  filter(is.na(specification) & !is.na(`hisp/lat`)) %>% 
  dplyr::select(blockgroup, estimate, `hisp/lat`) %>% 
  spread(key = `hisp/lat`, value = estimate) %>%
  mutate(`% non hispanic/latino` = `Not Hispanic or Latino`*100/(`Hispanic or Latino` + `Not Hispanic or Latino`))

# join with the race data
bay_race_by_block <- bay_race_by_block %>% left_join(bay_hisplat_by_block %>% dplyr::select(blockgroup, `% non hispanic/latino`))

# plotting
# percent white
bay_race_by_block %>% 
  ggplot(aes(
  x = `% white`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are white",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and White Residents"
  )

white_model <- lm(`% Not Completely at Home` ~ `% white`, bay_race_by_block)
summary(white_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% white`, data = bay_race_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.756  -5.808  -0.274   5.405  40.487 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 49.126499   0.314033  156.44   <2e-16 ***
## `% white`    0.082606   0.005327   15.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.769 on 4734 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.04833,    Adjusted R-squared:  0.04813 
## F-statistic: 240.4 on 1 and 4734 DF,  p-value: < 2.2e-16
# percent Asian
bay_race_by_block %>% 
  ggplot(aes(
  x = `% Asian`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are Asian",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Asian Residents"
  )

asian_model <- lm(`% Not Completely at Home` ~ `% Asian`, bay_race_by_block)
summary(asian_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% Asian`, data = bay_race_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.733  -5.235  -0.583   4.657  42.945 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 58.193029   0.176671  329.39   <2e-16 ***
## `% Asian`   -0.195125   0.005611  -34.77   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.023 on 4734 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.2035, Adjusted R-squared:  0.2033 
## F-statistic:  1209 on 1 and 4734 DF,  p-value: < 2.2e-16
# percent non hispanic/latino
bay_race_by_block %>% 
  ggplot(aes(
  x = `% non hispanic/latino`,
  y = `% Not Completely at Home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are not Hispanic or Latino",
    y = "Percent devices leaving home on weekdays since shelter-in-place",
    title = "Bay Area: Social Distancing and Hispanic/Latino Residents"
  )

hisp_model <- lm(`% Not Completely at Home` ~ `% non hispanic/latino`, bay_race_by_block)
summary(hisp_model)
## 
## Call:
## lm(formula = `% Not Completely at Home` ~ `% non hispanic/latino`, 
##     data = bay_race_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.817  -5.637  -0.576   4.950  41.449 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             62.827282   0.517102  121.50   <2e-16 ***
## `% non hispanic/latino` -0.119133   0.006458  -18.45   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.683 on 4734 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.06706,    Adjusted R-squared:  0.06686 
## F-statistic: 340.3 on 1 and 4734 DF,  p-value: < 2.2e-16

Compare to pre-shelter-in-place behavior:

bay_race_by_block %>% 
  ggplot(aes(
  x = `% white`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are white",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Social Distancing and White Residents Pre Shelter-in-Place"
  )

white_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% white`, bay_race_by_block)
summary(white_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% white`, 
##     data = bay_race_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.636  -3.187   0.365   3.610  14.301 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 74.825617   0.190483  392.82   <2e-16 ***
## `% white`    0.055451   0.003231   17.16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.317 on 4732 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.05859,    Adjusted R-squared:  0.05839 
## F-statistic: 294.5 on 1 and 4732 DF,  p-value: < 2.2e-16
bay_race_by_block %>% 
  ggplot(aes(
  x = `% Asian`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are Asian",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Social Distancing and Asian Residents Pre Shelter-in-Place"
  )

asian_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% Asian`, bay_race_by_block)
summary(asian_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% Asian`, 
##     data = bay_race_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.937  -3.425   0.249   3.755  14.494 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 77.600043   0.120661 643.127   <2e-16 ***
## `% Asian`    0.009013   0.003832   2.352   0.0187 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.477 on 4732 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.001168,   Adjusted R-squared:  0.0009568 
## F-statistic: 5.533 on 1 and 4732 DF,  p-value: 0.0187
bay_race_by_block %>% 
  ggplot(aes(
  x = `% non hispanic/latino`,
  y = `% Not Completely at Home Pre Shelter`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are not Hispanic or Latino",
    y = "Percent devices leaving home on weekdays pre-shelter-in-place",
    title = "Bay Area: Social Distancing and Hispanic/Latino Residents Pre Shelter-in-Place"
  )

hisp_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% non hispanic/latino`, bay_race_by_block)
summary(hisp_model2)
## 
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% non hispanic/latino`, 
##     data = bay_race_by_block)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.022  -3.189   0.459   3.605  17.674 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             72.007971   0.314583  228.90   <2e-16 ***
## `% non hispanic/latino`  0.074764   0.003929   19.03   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.282 on 4732 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.07109,    Adjusted R-squared:  0.07089 
## F-statistic: 362.1 on 1 and 4732 DF,  p-value: < 2.2e-16

Multiple regression analysis

Multiple regression analysis with income, education, and internet

# multiple regression 
modeltest <- lm(bay_ami_by_block$`% Not Completely at Home` ~ bay_ami_by_block$`% over 125,000` + bay_education_by_block$`percent associates or higher` + bay_internet_by_block$`percent high speed`)
summary(modeltest)
## 
## Call:
## lm(formula = bay_ami_by_block$`% Not Completely at Home` ~ bay_ami_by_block$`% over 125,000` + 
##     bay_education_by_block$`percent associates or higher` + bay_internet_by_block$`percent high speed`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.480  -5.428  -0.583   4.630  44.911 
## 
## Coefficients:
##                                                        Estimate Std. Error
## (Intercept)                                           64.648674   0.756391
## bay_ami_by_block$`% over 125,000`                     -0.120948   0.008953
## bay_education_by_block$`percent associates or higher` -0.026757   0.008859
## bay_internet_by_block$`percent high speed`            -0.063025   0.011528
##                                                       t value Pr(>|t|)    
## (Intercept)                                            85.470  < 2e-16 ***
## bay_ami_by_block$`% over 125,000`                     -13.510  < 2e-16 ***
## bay_education_by_block$`percent associates or higher`  -3.020  0.00254 ** 
## bay_internet_by_block$`percent high speed`             -5.467 4.81e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.23 on 4726 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1529, Adjusted R-squared:  0.1524 
## F-statistic: 284.4 on 3 and 4726 DF,  p-value: < 2.2e-16

Changes in staying at home behavior

bay_dem_distancing <- bay_internet_by_block %>% 
  dplyr::select(`percent high speed`, `% Not Completely at Home`, `% Completely at Home`, blockgroup) %>% 
  left_join(bay_education_by_block %>% dplyr::select(blockgroup, `percent associates or higher`)) %>% 
  left_join(bay_ami_by_block %>% dplyr::select(blockgroup, `% over 125,000`)) %>% 
  left_join(bay_ami_by_block %>% dplyr::select(blockgroup, `% over 100,000`)) %>% 
  left_join(bay_ami_by_block %>% dplyr::select(blockgroup, `% over 75,000`)) %>% 
  left_join(bay_age_by_block %>% dplyr::select(blockgroup, `percent less than 30`)) %>% 
  left_join(bay_age_by_block %>% dplyr::select(blockgroup, `percent elderly`)) %>% 
  left_join(bay_lang_by_block %>% dplyr::select(blockgroup, `% not speaking spanish`)) %>% 
  left_join(bay_lang_by_block %>% dplyr::select(blockgroup, `% speaking english > well`)) %>% 
  left_join(bay_no_vehicles_by_block %>% dplyr::select(blockgroup, `percent with vehicles`)) %>%
  left_join(bay_occupants_per_room_by_block %>% dplyr::select(blockgroup, `percent less than 1`)) %>% 
  left_join(bay_race_by_block %>% dplyr::select(blockgroup, `% white`, `% Asian`, `% non hispanic/latino`)) %>% 
  left_join(bay_age_by_block %>% dplyr::select(blockgroup, `percent less than 18`)) %>% 
  left_join(bay_age_by_block %>% dplyr::select(blockgroup, `percent 20-29`))

bay_dem_distancing_pre_post <- bay_dem_distancing %>% 
  left_join(bay_internet_by_block %>% dplyr::select(`% Not Completely at Home Pre Shelter`, `% Completely at Home Pre Shelter`, blockgroup)) %>% 
  mutate(`% increase in staying completely home` = `% Completely at Home` - `% Completely at Home Pre Shelter`, frac_increase = `% increase in staying completely home`/`% Completely at Home Pre Shelter`)

bay_dem_distancing[is.na(bay_dem_distancing)] <- 0
bay_dem_distancing_pre_post[is.na(bay_dem_distancing_pre_post)] <- 0

saveRDS(bay_dem_distancing_pre_post, "/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/bay_socialdistancing_demdata_prepostdifs_manyvars.rds")

# bay_dem_distancing_pre_post <- readRDS("/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/bay_socialdistancing_demdata_prepostdifs_manyvars.rds")

Age

# age
bay_dem_distancing_pre_post %>%
  ggplot(aes(
  x = `percent less than 30`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents younger than 30",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Young Age Groups"
  )

young_model_dif <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`percent less than 30`)
summary(young_model_dif)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`percent less than 30`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -58.943  -6.424   0.005   6.855  33.055 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                        28.92319    0.54029  53.533
## bay_dem_distancing_pre_post$`percent less than 30` -0.13344    0.01463  -9.123
##                                                    Pr(>|t|)    
## (Intercept)                                          <2e-16 ***
## bay_dem_distancing_pre_post$`percent less than 30`   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.62 on 4741 degrees of freedom
## Multiple R-squared:  0.01725,    Adjusted R-squared:  0.01704 
## F-statistic: 83.22 on 1 and 4741 DF,  p-value: < 2.2e-16
young_model_frac <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`percent less than 30`)
summary(young_model_frac)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`percent less than 30`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2657 -0.4941 -0.1215  0.3793  3.9988 
## 
## Coefficients:
##                                                     Estimate Std. Error t value
## (Intercept)                                         1.588540   0.037599   42.25
## bay_dem_distancing_pre_post$`percent less than 30` -0.010203   0.001018  -10.02
##                                                    Pr(>|t|)    
## (Intercept)                                          <2e-16 ***
## bay_dem_distancing_pre_post$`percent less than 30`   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7392 on 4741 degrees of freedom
## Multiple R-squared:  0.02075,    Adjusted R-squared:  0.02054 
## F-statistic: 100.5 on 1 and 4741 DF,  p-value: < 2.2e-16
bay_dem_distancing_pre_post %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
  ggplot(aes(
  x = `percent elderly`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents 65 and older",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Elderly Population"
  )

elderly_model_dif <- lm(`% increase in staying completely home` ~ `percent elderly`, bay_dem_distancing_pre_post %>% filter(`percent elderly` < 50))
summary(elderly_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent elderly`, 
##     data = bay_dem_distancing_pre_post %>% filter(`percent elderly` < 
##         50))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -61.069  -6.626  -0.041   6.961  33.418 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       21.99372    0.32828  66.996  < 2e-16 ***
## `percent elderly`  0.14989    0.01897   7.902  3.4e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.62 on 4700 degrees of freedom
## Multiple R-squared:  0.01311,    Adjusted R-squared:  0.0129 
## F-statistic: 62.44 on 1 and 4700 DF,  p-value: 3.401e-15
elderly_model_frac <- lm(frac_increase ~ `percent elderly`, bay_dem_distancing_pre_post %>% filter(`percent elderly` < 50))
summary(elderly_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `percent elderly`, data = bay_dem_distancing_pre_post %>% 
##     filter(`percent elderly` < 50))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0499 -0.5002 -0.1194  0.3741  3.9793 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1.006917   0.022780   44.20   <2e-16 ***
## `percent elderly` 0.014802   0.001316   11.24   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.737 on 4700 degrees of freedom
## Multiple R-squared:  0.0262, Adjusted R-squared:  0.02599 
## F-statistic: 126.4 on 1 and 4700 DF,  p-value: < 2.2e-16
bay_dem_distancing_pre_post %>%
  ggplot(aes(
  x = `percent less than 18`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents younger than 18",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Child Population"
  )

child_model_dif <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`percent less than 18`)
summary(child_model_dif)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`percent less than 18`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -57.351  -6.872   0.183   7.259  31.462 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                        19.85106    0.41220   48.16
## bay_dem_distancing_pre_post$`percent less than 18`  0.21677    0.01907   11.37
##                                                    Pr(>|t|)    
## (Intercept)                                          <2e-16 ***
## bay_dem_distancing_pre_post$`percent less than 18`   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.57 on 4741 degrees of freedom
## Multiple R-squared:  0.02653,    Adjusted R-squared:  0.02632 
## F-statistic: 129.2 on 1 and 4741 DF,  p-value: < 2.2e-16
child_model_frac <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`percent less than 18`)
summary(child_model_frac)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`percent less than 18`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5310 -0.5202 -0.0999  0.3960  3.9959 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                        0.895641   0.028659   31.25
## bay_dem_distancing_pre_post$`percent less than 18` 0.016538   0.001326   12.47
##                                                    Pr(>|t|)    
## (Intercept)                                          <2e-16 ***
## bay_dem_distancing_pre_post$`percent less than 18`   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7351 on 4741 degrees of freedom
## Multiple R-squared:  0.03177,    Adjusted R-squared:  0.03156 
## F-statistic: 155.6 on 1 and 4741 DF,  p-value: < 2.2e-16
bay_dem_distancing_pre_post %>%
  ggplot(aes(
  x = `percent 20-29`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
labs(
    x = "Percent of residents ages 20-29",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Young Adult Residents"
  )

young_adult_model_dif <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`percent 20-29`)
summary(young_adult_model_dif)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`percent 20-29`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -55.305  -6.402   0.193   6.865  38.925 
## 
## Coefficients:
##                                             Estimate Std. Error t value
## (Intercept)                                 28.63144    0.28559  100.25
## bay_dem_distancing_pre_post$`percent 20-29` -0.33146    0.01816  -18.25
##                                             Pr(>|t|)    
## (Intercept)                                   <2e-16 ***
## bay_dem_distancing_pre_post$`percent 20-29`   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.36 on 4741 degrees of freedom
## Multiple R-squared:  0.06568,    Adjusted R-squared:  0.06548 
## F-statistic: 333.3 on 1 and 4741 DF,  p-value: < 2.2e-16
young_adult_model_frac <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`percent 20-29`)
summary(young_adult_model_frac)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`percent 20-29`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2742 -0.4762 -0.0915  0.3800  3.9302 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                  1.596986   0.019606   81.45
## bay_dem_distancing_pre_post$`percent 20-29` -0.027642   0.001246  -22.18
##                                             Pr(>|t|)    
## (Intercept)                                   <2e-16 ***
## bay_dem_distancing_pre_post$`percent 20-29`   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7111 on 4741 degrees of freedom
## Multiple R-squared:  0.09398,    Adjusted R-squared:  0.09379 
## F-statistic: 491.8 on 1 and 4741 DF,  p-value: < 2.2e-16

Income

# income - less than $75000
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% over 75,000`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $75,000 (50% AMI) annually",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Households Above 50% AMI"
  )

income_75_model_dif <- lm(`% increase in staying completely home` ~ `% over 75,000`, bay_dem_distancing_pre_post)
summary(income_75_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% over 75,000`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -63.107  -5.198   0.477   5.941  33.250 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     7.841106   0.439548   17.84   <2e-16 ***
## `% over 75,000` 0.270610   0.006918   39.12   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.317 on 4741 degrees of freedom
## Multiple R-squared:  0.244,  Adjusted R-squared:  0.2438 
## F-statistic:  1530 on 1 and 4741 DF,  p-value: < 2.2e-16
income_75_model_frac <- lm(frac_increase ~ `% over 75,000`, bay_dem_distancing_pre_post)
summary(income_75_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% over 75,000`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2214 -0.3916 -0.0535  0.3252  3.7891 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.0372825  0.0301996   1.235    0.217    
## `% over 75,000` 0.0196873  0.0004753  41.418   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6401 on 4741 degrees of freedom
## Multiple R-squared:  0.2657, Adjusted R-squared:  0.2655 
## F-statistic:  1715 on 1 and 4741 DF,  p-value: < 2.2e-16
# income - less than $100000
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% over 100,000`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $100,000 (80% AMI) annually",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Households Below 80% AMI"
  )

income_100_model_dif <- lm(`% increase in staying completely home` ~ `% over 100,000`, bay_dem_distancing_pre_post)
summary(income_100_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% over 100,000`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -63.505  -4.953   0.532   5.880  30.128 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      10.999003   0.338838   32.46   <2e-16 ***
## `% over 100,000`  0.269674   0.006371   42.33   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.128 on 4741 degrees of freedom
## Multiple R-squared:  0.2743, Adjusted R-squared:  0.2741 
## F-statistic:  1792 on 1 and 4741 DF,  p-value: < 2.2e-16
income_100_model_frac <- lm(frac_increase ~ `% over 100,000`, bay_dem_distancing_pre_post)
summary(income_100_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% over 100,000`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3238 -0.3689 -0.0328  0.3185  3.6991 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.2566983  0.0231141   11.11   <2e-16 ***
## `% over 100,000` 0.0198302  0.0004346   45.63   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6227 on 4741 degrees of freedom
## Multiple R-squared:  0.3052, Adjusted R-squared:  0.305 
## F-statistic:  2082 on 1 and 4741 DF,  p-value: < 2.2e-16
# income - less than $125000
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% over 125,000`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) +
  labs(
    x = "Percent of housholds with incomes over $125,000 annually",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Households Below $125,000"
  )

income_125_model_dif <- lm(`% increase in staying completely home` ~ `% over 125,000`, bay_dem_distancing_pre_post)
summary(income_125_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% over 125,000`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -66.353  -4.707   0.662   5.859  27.495 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      13.210195   0.281170   46.98   <2e-16 ***
## `% over 125,000`  0.280752   0.006357   44.17   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.019 on 4741 degrees of freedom
## Multiple R-squared:  0.2915, Adjusted R-squared:  0.2914 
## F-statistic:  1951 on 1 and 4741 DF,  p-value: < 2.2e-16
income_125_model_frac <- lm(frac_increase ~ `% over 125,000`, bay_dem_distancing_pre_post)
summary(income_125_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% over 125,000`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5463 -0.3529 -0.0299  0.3202  3.6688 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.4096837  0.0190321   21.53   <2e-16 ***
## `% over 125,000` 0.0208903  0.0004303   48.55   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6105 on 4741 degrees of freedom
## Multiple R-squared:  0.3321, Adjusted R-squared:  0.332 
## F-statistic:  2357 on 1 and 4741 DF,  p-value: < 2.2e-16

Language

# language
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% speaking english > well`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals speaking English well",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and English Language Ability"
  )

english_ability_model_dif <- lm(`% increase in staying completely home` ~ `% speaking english > well`, bay_dem_distancing_pre_post)
summary(english_ability_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% speaking english > well`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -57.924  -6.518   0.135   6.957  32.920 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  6.00262    1.54502   3.885 0.000104 ***
## `% speaking english > well`  0.19740    0.01668  11.836  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.56 on 4741 degrees of freedom
## Multiple R-squared:  0.0287, Adjusted R-squared:  0.0285 
## F-statistic: 140.1 on 1 and 4741 DF,  p-value: < 2.2e-16
english_ability_model_frac <- lm(frac_increase ~ `% speaking english > well`, bay_dem_distancing_pre_post)
summary(english_ability_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% speaking english > well`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1540 -0.4823 -0.0964  0.3732  3.9587 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 -0.567857   0.106103  -5.352 9.11e-08 ***
## `% speaking english > well`  0.019475   0.001145  17.004  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7252 on 4741 degrees of freedom
## Multiple R-squared:  0.05748,    Adjusted R-squared:  0.05728 
## F-statistic: 289.1 on 1 and 4741 DF,  p-value: < 2.2e-16
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% not speaking spanish`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of individuals not speaking Spanish",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Spanish Language Ability"
  )

spanish_speaking_model_dif <- lm(`% increase in staying completely home` ~ `% not speaking spanish`, bay_dem_distancing_pre_post)
summary(spanish_speaking_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% not speaking spanish`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -63.486  -5.815   0.671   6.575  29.580 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              6.786948   0.704050    9.64   <2e-16 ***
## `% not speaking spanish` 0.207614   0.008212   25.28   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.06 on 4741 degrees of freedom
## Multiple R-squared:  0.1188, Adjusted R-squared:  0.1186 
## F-statistic: 639.2 on 1 and 4741 DF,  p-value: < 2.2e-16
spanish_speaking_model_frac <- lm(frac_increase ~ `% not speaking spanish`, bay_dem_distancing_pre_post)
summary(spanish_speaking_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% not speaking spanish`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1334 -0.4446 -0.0571  0.3544  3.8584 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -0.0664614  0.0486305  -1.367    0.172    
## `% not speaking spanish`  0.0154268  0.0005672  27.197   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6948 on 4741 degrees of freedom
## Multiple R-squared:  0.135,  Adjusted R-squared:  0.1348 
## F-statistic: 739.7 on 1 and 4741 DF,  p-value: < 2.2e-16

Occupants per room

# occupants per room
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `percent less than 1`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households with 1 or fewer occupant per room",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Room Occupancy"
  )

occupants_model_dif <- lm(`% increase in staying completely home` ~ `percent less than 1`, bay_dem_distancing_pre_post)
summary(occupants_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent less than 1`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -62.986  -6.394   0.249   6.793  33.192 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            0.34571    1.43730   0.241     0.81    
## `percent less than 1`  0.25640    0.01536  16.689   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.41 on 4741 degrees of freedom
## Multiple R-squared:  0.05549,    Adjusted R-squared:  0.05529 
## F-statistic: 278.5 on 1 and 4741 DF,  p-value: < 2.2e-16
occupants_model_frac <- lm(frac_increase ~ `percent less than 1`, bay_dem_distancing_pre_post)
summary(occupants_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `percent less than 1`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0903 -0.4844 -0.0931  0.3765  3.8806 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -0.70208    0.09918  -7.079 1.66e-12 ***
## `percent less than 1`  0.02074    0.00106  19.563  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7186 on 4741 degrees of freedom
## Multiple R-squared:  0.0747, Adjusted R-squared:  0.0745 
## F-statistic: 382.7 on 1 and 4741 DF,  p-value: < 2.2e-16

Vehicle ownership

# vehicles - percent with no vehicles
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `percent with vehicles`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of housholds with vehicles available",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Vehicle Availability"
  )

vehicles_model_dif <- lm(`% increase in staying completely home` ~ `percent with vehicles`, bay_dem_distancing_pre_post)
summary(vehicles_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent with vehicles`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -62.720  -6.620   0.012   7.080  31.380 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              8.20415    1.04747   7.832 5.87e-15 ***
## `percent with vehicles`  0.17516    0.01135  15.433  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.46 on 4741 degrees of freedom
## Multiple R-squared:  0.04784,    Adjusted R-squared:  0.04764 
## F-statistic: 238.2 on 1 and 4741 DF,  p-value: < 2.2e-16
vehicles_model_frac <- lm(frac_increase ~ `percent with vehicles`, bay_dem_distancing_pre_post)
summary(vehicles_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `percent with vehicles`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0690 -0.4971 -0.1096  0.3759  3.9661 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -0.0685642  0.0723782  -0.947    0.344    
## `percent with vehicles`  0.0141914  0.0007842  18.096   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7225 on 4741 degrees of freedom
## Multiple R-squared:  0.06461,    Adjusted R-squared:  0.06441 
## F-statistic: 327.5 on 1 and 4741 DF,  p-value: < 2.2e-16

Education

bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `percent associates or higher`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of people with an degree at Associate's level or higher",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Education"
  )

educ_model_dif <- lm(`% increase in staying completely home` ~ `percent associates or higher`, bay_dem_distancing_pre_post)
summary(educ_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent associates or higher`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -66.217  -5.112   0.902   6.228  26.234 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    12.240229   0.374878   32.65   <2e-16 ***
## `percent associates or higher`  0.237396   0.006909   34.36   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.588 on 4741 degrees of freedom
## Multiple R-squared:  0.1994, Adjusted R-squared:  0.1992 
## F-statistic:  1181 on 1 and 4741 DF,  p-value: < 2.2e-16
educ_model_frac <- lm(frac_increase ~ `percent associates or higher`, bay_dem_distancing_pre_post)
summary(educ_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `percent associates or higher`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3224 -0.3783 -0.0346  0.3409  3.5256 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    0.3298148  0.0256120   12.88   <2e-16 ***
## `percent associates or higher` 0.0178170  0.0004721   37.74   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6551 on 4741 degrees of freedom
## Multiple R-squared:  0.2311, Adjusted R-squared:  0.2309 
## F-statistic:  1425 on 1 and 4741 DF,  p-value: < 2.2e-16

Internet

bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `percent high speed`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of households with broadband such as cable, fiber optic or DSL",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and High Speed Internet"
  )

internet_model_dif <- lm(`% increase in staying completely home` ~ `percent high speed`, bay_dem_distancing_pre_post)
summary(internet_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent high speed`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -61.454  -5.644   0.365   6.246  37.341 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -1.832438   0.797146  -2.299   0.0216 *  
## `percent high speed`  0.328581   0.009905  33.173   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.653 on 4741 degrees of freedom
## Multiple R-squared:  0.1884, Adjusted R-squared:  0.1882 
## F-statistic:  1100 on 1 and 4741 DF,  p-value: < 2.2e-16
internet_model_frac <- lm(frac_increase ~ `percent high speed`, bay_dem_distancing_pre_post)
summary(internet_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `percent high speed`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3723 -0.4295 -0.0927  0.3348  3.7444 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -0.5276772  0.0559890  -9.425   <2e-16 ***
## `percent high speed`  0.0221528  0.0006957  31.843   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.678 on 4741 degrees of freedom
## Multiple R-squared:  0.1762, Adjusted R-squared:  0.176 
## F-statistic:  1014 on 1 and 4741 DF,  p-value: < 2.2e-16

Race

# white
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% white`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are white",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and White Residents"
  )

white_model_dif <- lm(`% increase in staying completely home` ~ `% white`, bay_dem_distancing_pre_post)
summary(white_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% white`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -60.224  -6.849  -0.068   7.103  32.020 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 25.449922   0.381489  66.712  < 2e-16 ***
## `% white`   -0.023245   0.006476  -3.589 0.000335 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.7 on 4741 degrees of freedom
## Multiple R-squared:  0.00271,    Adjusted R-squared:  0.0025 
## F-statistic: 12.88 on 1 and 4741 DF,  p-value: 0.000335
white_model_frac <- lm(frac_increase ~ `% white`, bay_dem_distancing_pre_post)
summary(white_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% white`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0940 -0.5074 -0.1190  0.3926  4.0869 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.0566327  0.0264927  39.884  < 2e-16 ***
## `% white`   0.0031738  0.0004498   7.057 1.95e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7431 on 4741 degrees of freedom
## Multiple R-squared:  0.01039,    Adjusted R-squared:  0.01019 
## F-statistic:  49.8 on 1 and 4741 DF,  p-value: 1.953e-12
# asian
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% Asian`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are Asian",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Asian Residents"
  )

asian_model_dif <- lm(`% increase in staying completely home` ~ `% Asian`, bay_dem_distancing_pre_post)
summary(asian_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% Asian`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -56.403  -5.930   0.005   6.516  30.859 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 19.338483   0.215982   89.54   <2e-16 ***
## `% Asian`    0.205779   0.006865   29.98   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.825 on 4741 degrees of freedom
## Multiple R-squared:  0.1593, Adjusted R-squared:  0.1591 
## F-statistic: 898.5 on 1 and 4741 DF,  p-value: < 2.2e-16
asian_model_frac <- lm(frac_increase ~ `% Asian`, bay_dem_distancing_pre_post)
summary(asian_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% Asian`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2912 -0.4843 -0.1484  0.3709  4.1052 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.0084129  0.0158665   63.56   <2e-16 ***
## `% Asian`   0.0092693  0.0005043   18.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7218 on 4741 degrees of freedom
## Multiple R-squared:  0.06652,    Adjusted R-squared:  0.06632 
## F-statistic: 337.8 on 1 and 4741 DF,  p-value: < 2.2e-16
# hispanic/latino
bay_dem_distancing_pre_post %>% 
  ggplot(aes(
  x = `% non hispanic/latino`,
  y = `% increase in staying completely home`
)) + geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "Percent of residents that are not Hispanic or Latino",
    y = "Dif in % completely at home after shelter-in-place relative to before",
    title = "Bay Area: Social Distancing and Hispanic/Latino Residents"
  )

hisp_model_dif <- lm(`% increase in staying completely home` ~ `% non hispanic/latino`, bay_dem_distancing_pre_post)
summary(hisp_model_dif)
## 
## Call:
## lm(formula = `% increase in staying completely home` ~ `% non hispanic/latino`, 
##     data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.040  -5.747   0.638   6.641  29.054 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             8.895479   0.587172   15.15   <2e-16 ***
## `% non hispanic/latino` 0.197388   0.007339   26.90   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.981 on 4741 degrees of freedom
## Multiple R-squared:  0.1324, Adjusted R-squared:  0.1322 
## F-statistic: 723.4 on 1 and 4741 DF,  p-value: < 2.2e-16
hisp_model_frac <- lm(frac_increase ~ `% non hispanic/latino`, bay_dem_distancing_pre_post)
summary(hisp_model_frac)
## 
## Call:
## lm(formula = frac_increase ~ `% non hispanic/latino`, data = bay_dem_distancing_pre_post)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1580 -0.4356 -0.0529  0.3604  3.7867 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             0.0842276  0.0404697   2.081   0.0375 *  
## `% non hispanic/latino` 0.0147441  0.0005058  29.149   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6879 on 4741 degrees of freedom
## Multiple R-squared:  0.152,  Adjusted R-squared:  0.1518 
## F-statistic: 849.7 on 1 and 4741 DF,  p-value: < 2.2e-16

Multiple regression analysis: income and Spanish language ability

difs_model_inc_span <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish`)
summary(difs_model_inc_span)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -66.465  -4.727   0.688   5.830  27.382 
## 
## Coefficients:
##                                                       Estimate Std. Error
## (Intercept)                                          10.236332   0.637468
## bay_dem_distancing_pre_post$`% over 125,000`          0.259645   0.007530
## bay_dem_distancing_pre_post$`% not speaking spanish`  0.045309   0.008722
##                                                      t value Pr(>|t|)    
## (Intercept)                                           16.058  < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`          34.483  < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish`   5.195 2.14e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.995 on 4740 degrees of freedom
## Multiple R-squared:  0.2955, Adjusted R-squared:  0.2952 
## F-statistic: 994.2 on 2 and 4740 DF,  p-value: < 2.2e-16
frac_model_inc_span <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish`)
summary(frac_model_inc_span)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% not speaking spanish`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4714 -0.3522 -0.0253  0.3181  3.6414 
## 
## Coefficients:
##                                                       Estimate Std. Error
## (Intercept)                                          0.1903893  0.0431266
## bay_dem_distancing_pre_post$`% over 125,000`         0.0193338  0.0005094
## bay_dem_distancing_pre_post$`% not speaking spanish` 0.0033411  0.0005901
##                                                      t value Pr(>|t|)    
## (Intercept)                                            4.415 1.03e-05 ***
## bay_dem_distancing_pre_post$`% over 125,000`          37.954  < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish`   5.662 1.58e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6085 on 4740 degrees of freedom
## Multiple R-squared:  0.3366, Adjusted R-squared:  0.3363 
## F-statistic:  1202 on 2 and 4740 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, and Spanish language ability

difs_model_inc_span_educ <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish` +  bay_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_span_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish` + 
##         bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -66.827  -4.512   0.784   5.808  26.527 
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                10.73268    0.64468
## bay_dem_distancing_pre_post$`% over 125,000`                0.23491    0.00916
## bay_dem_distancing_pre_post$`% not speaking spanish`        0.01974    0.01025
## bay_dem_distancing_pre_post$`percent associates or higher`  0.05193    0.01100
##                                                            t value Pr(>|t|)    
## (Intercept)                                                 16.648  < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                25.645  < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish`         1.926   0.0542 .  
## bay_dem_distancing_pre_post$`percent associates or higher`   4.720 2.43e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.975 on 4739 degrees of freedom
## Multiple R-squared:  0.2988, Adjusted R-squared:  0.2984 
## F-statistic: 673.2 on 3 and 4739 DF,  p-value: < 2.2e-16
frac_model_inc_span_educ <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish` +  bay_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_span_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% not speaking spanish` + bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4975 -0.3488 -0.0186  0.3136  3.5814 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                0.2317750  0.0435610
## bay_dem_distancing_pre_post$`% over 125,000`               0.0172713  0.0006190
## bay_dem_distancing_pre_post$`% not speaking spanish`       0.0012094  0.0006926
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0043301  0.0007434
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  5.321 1.08e-07 ***
## bay_dem_distancing_pre_post$`% over 125,000`                27.904  < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish`         1.746   0.0809 .  
## bay_dem_distancing_pre_post$`percent associates or higher`   5.824 6.11e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6064 on 4739 degrees of freedom
## Multiple R-squared:  0.3413, Adjusted R-squared:  0.3409 
## F-statistic: 818.5 on 3 and 4739 DF,  p-value: < 2.2e-16

The effect of Spanish language speaking vanishes when accounting for both education and income.

Multiple regression analysis: income, English language ability and education

difs_model_inc_eng_educ <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` +  bay_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_eng_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` + 
##         bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -70.522  -4.328   0.873   5.687  25.848 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                24.959746   1.383953
## bay_dem_distancing_pre_post$`% over 125,000`                0.244092   0.009082
## bay_dem_distancing_pre_post$`% speaking english > well`    -0.165913   0.016854
## bay_dem_distancing_pre_post$`percent associates or higher`  0.098844   0.009937
##                                                            t value Pr(>|t|)    
## (Intercept)                                                 18.035   <2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                26.875   <2e-16 ***
## bay_dem_distancing_pre_post$`% speaking english > well`     -9.844   <2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   9.948   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.888 on 4739 degrees of freedom
## Multiple R-squared:  0.3123, Adjusted R-squared:  0.3119 
## F-statistic: 717.5 on 3 and 4739 DF,  p-value: < 2.2e-16
frac_model_inc_eng_educ <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` +  bay_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_eng_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% speaking english > well` + 
##     bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5450 -0.3448 -0.0135  0.3161  3.5823 
## 
## Coefficients:
##                                                              Estimate
## (Intercept)                                                 0.7462848
## bay_dem_distancing_pre_post$`% over 125,000`                0.0176203
## bay_dem_distancing_pre_post$`% speaking english > well`    -0.0056714
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0062369
##                                                            Std. Error t value
## (Intercept)                                                 0.0942164   7.921
## bay_dem_distancing_pre_post$`% over 125,000`                0.0006183  28.498
## bay_dem_distancing_pre_post$`% speaking english > well`     0.0011474  -4.943
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0006765   9.220
##                                                            Pr(>|t|)    
## (Intercept)                                                2.91e-15 ***
## bay_dem_distancing_pre_post$`% over 125,000`                < 2e-16 ***
## bay_dem_distancing_pre_post$`% speaking english > well`    7.96e-07 ***
## bay_dem_distancing_pre_post$`percent associates or higher`  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6051 on 4739 degrees of freedom
## Multiple R-squared:  0.3443, Adjusted R-squared:  0.3438 
## F-statistic: 829.3 on 3 and 4739 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, English language ability, education, Spanish language ability, and vehicle ownership

difs_model_lots <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` +  bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`% not speaking spanish` + bay_dem_distancing_pre_post$`percent with vehicles`)
summary(difs_model_lots)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` + 
##         bay_dem_distancing_pre_post$`percent associates or higher` + 
##         bay_dem_distancing_pre_post$`% not speaking spanish` + 
##         bay_dem_distancing_pre_post$`percent with vehicles`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -73.494  -4.310   0.828   5.515  25.908 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                18.798366   1.453769
## bay_dem_distancing_pre_post$`% over 125,000`                0.206123   0.009496
## bay_dem_distancing_pre_post$`% speaking english > well`    -0.272710   0.018780
## bay_dem_distancing_pre_post$`percent associates or higher`  0.099056   0.011133
## bay_dem_distancing_pre_post$`% not speaking spanish`        0.075914   0.010674
## bay_dem_distancing_pre_post$`percent with vehicles`         0.121714   0.010856
##                                                            t value Pr(>|t|)    
## (Intercept)                                                 12.931  < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                21.705  < 2e-16 ***
## bay_dem_distancing_pre_post$`% speaking english > well`    -14.521  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   8.898  < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish`         7.112 1.32e-12 ***
## bay_dem_distancing_pre_post$`percent with vehicles`         11.212  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.747 on 4737 degrees of freedom
## Multiple R-squared:  0.3342, Adjusted R-squared:  0.3335 
## F-statistic: 475.6 on 5 and 4737 DF,  p-value: < 2.2e-16
frac_model_lots <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` +  bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`% not speaking spanish` + bay_dem_distancing_pre_post$`percent with vehicles`)
summary(frac_model_lots)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% speaking english > well` + 
##     bay_dem_distancing_pre_post$`percent associates or higher` + 
##     bay_dem_distancing_pre_post$`% not speaking spanish` + bay_dem_distancing_pre_post$`percent with vehicles`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6950 -0.3449 -0.0203  0.3086  3.5203 
## 
## Coefficients:
##                                                              Estimate
## (Intercept)                                                 0.3163669
## bay_dem_distancing_pre_post$`% over 125,000`                0.0149339
## bay_dem_distancing_pre_post$`% speaking english > well`    -0.0125433
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0069854
## bay_dem_distancing_pre_post$`% not speaking spanish`        0.0038793
## bay_dem_distancing_pre_post$`percent with vehicles`         0.0088204
##                                                            Std. Error t value
## (Intercept)                                                 0.0989750   3.196
## bay_dem_distancing_pre_post$`% over 125,000`                0.0006465  23.099
## bay_dem_distancing_pre_post$`% speaking english > well`     0.0012786  -9.810
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0007579   9.217
## bay_dem_distancing_pre_post$`% not speaking spanish`        0.0007267   5.338
## bay_dem_distancing_pre_post$`percent with vehicles`         0.0007391  11.934
##                                                            Pr(>|t|)    
## (Intercept)                                                  0.0014 ** 
## bay_dem_distancing_pre_post$`% over 125,000`                < 2e-16 ***
## bay_dem_distancing_pre_post$`% speaking english > well`     < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`  < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish`       9.83e-08 ***
## bay_dem_distancing_pre_post$`percent with vehicles`         < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5955 on 4737 degrees of freedom
## Multiple R-squared:  0.3651, Adjusted R-squared:  0.3644 
## F-statistic: 544.7 on 5 and 4737 DF,  p-value: < 2.2e-16

Multiple regression analysis: Hispanic/Latino, income, and education

difs_model_inc_hisp_educ <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% non hispanic/latino` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_hisp_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% non hispanic/latino` + 
##         bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -66.843  -4.546   0.752   5.793  26.633 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                10.560060   0.532467
## bay_dem_distancing_pre_post$`% over 125,000`                0.234645   0.009145
## bay_dem_distancing_pre_post$`% non hispanic/latino`         0.029385   0.009691
## bay_dem_distancing_pre_post$`percent associates or higher`  0.043205   0.011416
##                                                            t value Pr(>|t|)    
## (Intercept)                                                 19.832  < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                25.659  < 2e-16 ***
## bay_dem_distancing_pre_post$`% non hispanic/latino`          3.032 0.002442 ** 
## bay_dem_distancing_pre_post$`percent associates or higher`   3.785 0.000156 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.969 on 4739 degrees of freedom
## Multiple R-squared:  0.2996, Adjusted R-squared:  0.2992 
## F-statistic: 675.8 on 3 and 4739 DF,  p-value: < 2.2e-16
frac_model_inc_hisp_educ <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% non hispanic/latino` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_hisp_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% non hispanic/latino` + bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4802 -0.3490 -0.0163  0.3181  3.5845 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                0.2097901  0.0359732
## bay_dem_distancing_pre_post$`% over 125,000`               0.0172406  0.0006178
## bay_dem_distancing_pre_post$`% non hispanic/latino`        0.0020764  0.0006547
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0036082  0.0007713
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  5.832 5.85e-09 ***
## bay_dem_distancing_pre_post$`% over 125,000`                27.906  < 2e-16 ***
## bay_dem_distancing_pre_post$`% non hispanic/latino`          3.171  0.00153 ** 
## bay_dem_distancing_pre_post$`percent associates or higher`   4.678 2.97e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.606 on 4739 degrees of freedom
## Multiple R-squared:  0.3423, Adjusted R-squared:  0.3418 
## F-statistic:   822 on 3 and 4739 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, and white residents

difs_model_inc_white_educ <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% white` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_white_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% white` + 
##         bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -63.428  -4.350   0.881   5.579  25.210 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                15.306684   0.390359
## bay_dem_distancing_pre_post$`% over 125,000`                0.242626   0.008837
## bay_dem_distancing_pre_post$`% white`                      -0.101650   0.005537
## bay_dem_distancing_pre_post$`percent associates or higher`  0.096559   0.009211
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  39.21   <2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                 27.45   <2e-16 ***
## bay_dem_distancing_pre_post$`% white`                       -18.36   <2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   10.48   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.675 on 4739 degrees of freedom
## Multiple R-squared:  0.3449, Adjusted R-squared:  0.3445 
## F-statistic: 831.5 on 3 and 4739 DF,  p-value: < 2.2e-16
frac_model_inc_white_educ <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% white` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_white_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% white` + bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5704 -0.3489 -0.0235  0.3175  3.5137 
## 
## Coefficients:
##                                                              Estimate
## (Intercept)                                                 0.3702557
## bay_dem_distancing_pre_post$`% over 125,000`                0.0174865
## bay_dem_distancing_pre_post$`% white`                      -0.0021496
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0057229
##                                                            Std. Error t value
## (Intercept)                                                 0.0272078  13.608
## bay_dem_distancing_pre_post$`% over 125,000`                0.0006160  28.389
## bay_dem_distancing_pre_post$`% white`                       0.0003859  -5.570
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0006420   8.914
##                                                            Pr(>|t|)    
## (Intercept)                                                 < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                < 2e-16 ***
## bay_dem_distancing_pre_post$`% white`                      2.69e-08 ***
## bay_dem_distancing_pre_post$`percent associates or higher`  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6046 on 4739 degrees of freedom
## Multiple R-squared:  0.3452, Adjusted R-squared:  0.3447 
## F-statistic: 832.6 on 3 and 4739 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, and Asian residents

difs_model_inc_asian_educ <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_asian_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + 
##         bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -62.615  -4.177   0.768   5.367  25.547 
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                9.940784   0.339685
## bay_dem_distancing_pre_post$`% over 125,000`               0.213721   0.008663
## bay_dem_distancing_pre_post$`% Asian`                      0.148516   0.006084
## bay_dem_distancing_pre_post$`percent associates or higher` 0.047341   0.008831
##                                                            t value Pr(>|t|)    
## (Intercept)                                                 29.265  < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                24.672  < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       24.412  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   5.361 8.68e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.462 on 4739 degrees of freedom
## Multiple R-squared:  0.3767, Adjusted R-squared:  0.3763 
## F-statistic: 954.5 on 3 and 4739 DF,  p-value: < 2.2e-16
frac_model_inc_asian_educ <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_asian_educ)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5370 -0.3447 -0.0230  0.3083  3.4180 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                0.2383122  0.0240592
## bay_dem_distancing_pre_post$`% over 125,000`               0.0166486  0.0006135
## bay_dem_distancing_pre_post$`% Asian`                      0.0046374  0.0004309
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0045230  0.0006255
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  9.905  < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                27.135  < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       10.762  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   7.231 5.55e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5993 on 4739 degrees of freedom
## Multiple R-squared:  0.3566, Adjusted R-squared:  0.3562 
## F-statistic: 875.5 on 3 and 4739 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, Asian residents, and English language ability

difs_model_inc_asian_educ_eng <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`% speaking english > well`)
summary(difs_model_inc_asian_educ_eng)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + 
##         bay_dem_distancing_pre_post$`percent associates or higher` + 
##         bay_dem_distancing_pre_post$`% speaking english > well`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -62.282  -4.168   0.773   5.359  25.554 
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                8.930573   1.503757
## bay_dem_distancing_pre_post$`% over 125,000`               0.212813   0.008762
## bay_dem_distancing_pre_post$`% Asian`                      0.150623   0.006808
## bay_dem_distancing_pre_post$`percent associates or higher` 0.044452   0.009775
## bay_dem_distancing_pre_post$`% speaking english > well`    0.012384   0.017957
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  5.939 3.08e-09 ***
## bay_dem_distancing_pre_post$`% over 125,000`                24.287  < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       22.124  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   4.547 5.57e-06 ***
## bay_dem_distancing_pre_post$`% speaking english > well`      0.690     0.49    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.462 on 4738 degrees of freedom
## Multiple R-squared:  0.3767, Adjusted R-squared:  0.3762 
## F-statistic: 715.9 on 4 and 4738 DF,  p-value: < 2.2e-16
frac_model_inc_asian_educ_eng <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`% speaking english > well`)
summary(frac_model_inc_asian_educ_eng)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + 
##     bay_dem_distancing_pre_post$`% speaking english > well`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5378 -0.3443 -0.0222  0.3083  3.4195 
## 
## Coefficients:
##                                                              Estimate
## (Intercept)                                                 0.2568982
## bay_dem_distancing_pre_post$`% over 125,000`                0.0166653
## bay_dem_distancing_pre_post$`% Asian`                       0.0045987
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0045762
## bay_dem_distancing_pre_post$`% speaking english > well`    -0.0002278
##                                                            Std. Error t value
## (Intercept)                                                 0.1065129   2.412
## bay_dem_distancing_pre_post$`% over 125,000`                0.0006207  26.851
## bay_dem_distancing_pre_post$`% Asian`                       0.0004822   9.536
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0006924   6.609
## bay_dem_distancing_pre_post$`% speaking english > well`     0.0012719  -0.179
##                                                            Pr(>|t|)    
## (Intercept)                                                  0.0159 *  
## bay_dem_distancing_pre_post$`% over 125,000`                < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher` 4.29e-11 ***
## bay_dem_distancing_pre_post$`% speaking english > well`      0.8578    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5994 on 4738 degrees of freedom
## Multiple R-squared:  0.3566, Adjusted R-squared:  0.3561 
## F-statistic: 656.5 on 4 and 4738 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, Asian residents, and high speed internet access

difs_model_inc_asian_educ_internet <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent high speed`)
summary(difs_model_inc_asian_educ_internet)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + 
##         bay_dem_distancing_pre_post$`percent associates or higher` + 
##         bay_dem_distancing_pre_post$`percent high speed`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -61.969  -4.196   0.701   5.317  24.681 
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                4.797571   0.733885
## bay_dem_distancing_pre_post$`% over 125,000`               0.189501   0.009138
## bay_dem_distancing_pre_post$`% Asian`                      0.144325   0.006068
## bay_dem_distancing_pre_post$`percent associates or higher` 0.029843   0.009050
## bay_dem_distancing_pre_post$`percent high speed`           0.089262   0.011310
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  6.537 6.93e-11 ***
## bay_dem_distancing_pre_post$`% over 125,000`                20.738  < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       23.785  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   3.297 0.000983 ***
## bay_dem_distancing_pre_post$`percent high speed`             7.892 3.66e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.408 on 4738 degrees of freedom
## Multiple R-squared:  0.3847, Adjusted R-squared:  0.3842 
## F-statistic: 740.7 on 4 and 4738 DF,  p-value: < 2.2e-16
frac_model_inc_asian_educ_internet <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent high speed`)
summary(frac_model_inc_asian_educ_internet)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + 
##     bay_dem_distancing_pre_post$`percent high speed`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4532 -0.3451 -0.0265  0.3010  3.4069 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                0.0284793  0.0522074
## bay_dem_distancing_pre_post$`% over 125,000`               0.0156605  0.0006500
## bay_dem_distancing_pre_post$`% Asian`                      0.0044665  0.0004317
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0038091  0.0006438
## bay_dem_distancing_pre_post$`percent high speed`           0.0036417  0.0008046
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  0.546    0.585    
## bay_dem_distancing_pre_post$`% over 125,000`                24.091  < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       10.347  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   5.916 3.52e-09 ***
## bay_dem_distancing_pre_post$`percent high speed`             4.526 6.15e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5981 on 4738 degrees of freedom
## Multiple R-squared:  0.3594, Adjusted R-squared:  0.3588 
## F-statistic: 664.4 on 4 and 4738 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, Asian residents, and vehicle ownership

difs_model_inc_asian_educ_vehicle <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent with vehicles`)
summary(difs_model_inc_asian_educ_vehicle)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + 
##         bay_dem_distancing_pre_post$`percent associates or higher` + 
##         bay_dem_distancing_pre_post$`percent with vehicles`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -63.033  -4.266   0.743   5.414  26.471 
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                2.113709   0.924419
## bay_dem_distancing_pre_post$`% over 125,000`               0.185154   0.009146
## bay_dem_distancing_pre_post$`% Asian`                      0.153931   0.006061
## bay_dem_distancing_pre_post$`percent associates or higher` 0.061546   0.008894
## bay_dem_distancing_pre_post$`percent with vehicles`        0.088719   0.009758
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  2.287   0.0223 *  
## bay_dem_distancing_pre_post$`% over 125,000`                20.245  < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       25.396  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   6.920 5.12e-12 ***
## bay_dem_distancing_pre_post$`percent with vehicles`          9.092  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.39 on 4738 degrees of freedom
## Multiple R-squared:  0.3874, Adjusted R-squared:  0.3868 
## F-statistic: 748.9 on 4 and 4738 DF,  p-value: < 2.2e-16
frac_model_inc_asian_educ_vehicle <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent with vehicles`)
summary(frac_model_inc_asian_educ_vehicle)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + 
##     bay_dem_distancing_pre_post$`percent with vehicles`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5103 -0.3449 -0.0296  0.3028  3.3752 
## 
## Coefficients:
##                                                              Estimate
## (Intercept)                                                -0.3768715
## bay_dem_distancing_pre_post$`% over 125,000`                0.0144033
## bay_dem_distancing_pre_post$`% Asian`                       0.0050630
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0056395
## bay_dem_distancing_pre_post$`percent with vehicles`         0.0069731
##                                                            Std. Error t value
## (Intercept)                                                 0.0653423  -5.768
## bay_dem_distancing_pre_post$`% over 125,000`                0.0006464  22.281
## bay_dem_distancing_pre_post$`% Asian`                       0.0004284  11.817
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0006287   8.970
## bay_dem_distancing_pre_post$`percent with vehicles`         0.0006897  10.110
##                                                            Pr(>|t|)    
## (Intercept)                                                8.55e-09 ***
## bay_dem_distancing_pre_post$`% over 125,000`                < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent with vehicles`         < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.593 on 4738 degrees of freedom
## Multiple R-squared:  0.3702, Adjusted R-squared:  0.3696 
## F-statistic: 696.2 on 4 and 4738 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, Asian residents, high speed internet access, and vehicle ownership

difs_model_inc_asian_educ_vehicle <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent with vehicles` + bay_dem_distancing_pre_post$`percent high speed`)
summary(difs_model_inc_asian_educ_vehicle)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + 
##         bay_dem_distancing_pre_post$`percent associates or higher` + 
##         bay_dem_distancing_pre_post$`percent with vehicles` + 
##         bay_dem_distancing_pre_post$`percent high speed`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -62.522  -4.177   0.704   5.339  27.981 
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                0.518614   0.982306
## bay_dem_distancing_pre_post$`% over 125,000`               0.175803   0.009338
## bay_dem_distancing_pre_post$`% Asian`                      0.150015   0.006104
## bay_dem_distancing_pre_post$`percent associates or higher` 0.047059   0.009390
## bay_dem_distancing_pre_post$`percent with vehicles`        0.069034   0.010592
## bay_dem_distancing_pre_post$`percent high speed`           0.057824   0.012250
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  0.528    0.598    
## bay_dem_distancing_pre_post$`% over 125,000`                18.827  < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       24.575  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   5.012 5.59e-07 ***
## bay_dem_distancing_pre_post$`percent with vehicles`          6.518 7.88e-11 ***
## bay_dem_distancing_pre_post$`percent high speed`             4.720 2.42e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.371 on 4737 degrees of freedom
## Multiple R-squared:  0.3902, Adjusted R-squared:  0.3896 
## F-statistic: 606.3 on 5 and 4737 DF,  p-value: < 2.2e-16
frac_model_inc_asian_educ_vehicle <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +  bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent with vehicles` + bay_dem_distancing_pre_post$`percent high speed`)
summary(frac_model_inc_asian_educ_vehicle)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + 
##     bay_dem_distancing_pre_post$`percent with vehicles` + bay_dem_distancing_pre_post$`percent high speed`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4984 -0.3455 -0.0311  0.3024  3.3757 
## 
## Coefficients:
##                                                              Estimate
## (Intercept)                                                -0.3920916
## bay_dem_distancing_pre_post$`% over 125,000`                0.0143141
## bay_dem_distancing_pre_post$`% Asian`                       0.0050257
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0055012
## bay_dem_distancing_pre_post$`percent with vehicles`         0.0067852
## bay_dem_distancing_pre_post$`percent high speed`            0.0005517
##                                                            Std. Error t value
## (Intercept)                                                 0.0695942  -5.634
## bay_dem_distancing_pre_post$`% over 125,000`                0.0006616  21.637
## bay_dem_distancing_pre_post$`% Asian`                       0.0004325  11.621
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0006653   8.269
## bay_dem_distancing_pre_post$`percent with vehicles`         0.0007504   9.042
## bay_dem_distancing_pre_post$`percent high speed`            0.0008679   0.636
##                                                            Pr(>|t|)    
## (Intercept)                                                1.86e-08 ***
## bay_dem_distancing_pre_post$`% over 125,000`                < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent with vehicles`         < 2e-16 ***
## bay_dem_distancing_pre_post$`percent high speed`              0.525    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5931 on 4737 degrees of freedom
## Multiple R-squared:  0.3702, Adjusted R-squared:  0.3696 
## F-statistic:   557 on 5 and 4737 DF,  p-value: < 2.2e-16

This model seems to capture the most variation so far, though it is only an improvement of about 1% of the variation predicted than the previous one with solely income, education, and Asian residents.

Multiple regression analysis: income, education, Asian population, child population

difs_model_inc_educ_child_asian <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000`  + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent less than 18` + bay_dem_distancing_pre_post$`% Asian` )
summary(difs_model_inc_educ_child_asian)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`percent associates or higher` + 
##         bay_dem_distancing_pre_post$`percent less than 18` + 
##         bay_dem_distancing_pre_post$`% Asian`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -57.943  -4.160   0.718   5.271  27.824 
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                3.456668   0.494425
## bay_dem_distancing_pre_post$`% over 125,000`               0.176887   0.008652
## bay_dem_distancing_pre_post$`percent associates or higher` 0.093489   0.008951
## bay_dem_distancing_pre_post$`percent less than 18`         0.272741   0.015518
## bay_dem_distancing_pre_post$`% Asian`                      0.154024   0.005904
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  6.991  3.1e-12 ***
## bay_dem_distancing_pre_post$`% over 125,000`                20.445  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`  10.444  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent less than 18`          17.575  < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       26.090  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.2 on 4738 degrees of freedom
## Multiple R-squared:  0.4148, Adjusted R-squared:  0.4143 
## F-statistic: 839.6 on 4 and 4738 DF,  p-value: < 2.2e-16
frac_model_inc_educ_child_asian <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000`  + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent less than 18` + bay_dem_distancing_pre_post$`% Asian` )
summary(frac_model_inc_educ_child_asian)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`percent associates or higher` + 
##     bay_dem_distancing_pre_post$`percent less than 18` + bay_dem_distancing_pre_post$`% Asian`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4472 -0.3335 -0.0322  0.2954  3.3258 
## 
## Coefficients:
##                                                              Estimate
## (Intercept)                                                -0.2377859
## bay_dem_distancing_pre_post$`% over 125,000`                0.0139441
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0079114
## bay_dem_distancing_pre_post$`percent less than 18`          0.0200261
## bay_dem_distancing_pre_post$`% Asian`                       0.0050419
##                                                            Std. Error t value
## (Intercept)                                                 0.0349337  -6.807
## bay_dem_distancing_pre_post$`% over 125,000`                0.0006113  22.811
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0006324  12.509
## bay_dem_distancing_pre_post$`percent less than 18`          0.0010965  18.264
## bay_dem_distancing_pre_post$`% Asian`                       0.0004171  12.087
##                                                            Pr(>|t|)    
## (Intercept)                                                1.12e-11 ***
## bay_dem_distancing_pre_post$`% over 125,000`                < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent less than 18`          < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5793 on 4738 degrees of freedom
## Multiple R-squared:  0.3989, Adjusted R-squared:  0.3984 
## F-statistic: 786.1 on 4 and 4738 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, Asian population, 20-29 population

difs_model_inc_educ_asian_yad <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000`  + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent 20-29` + bay_dem_distancing_pre_post$`% Asian` )
summary(difs_model_inc_educ_asian_yad)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`percent associates or higher` + 
##         bay_dem_distancing_pre_post$`percent 20-29` + bay_dem_distancing_pre_post$`% Asian`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -58.744  -4.201   0.790   5.460  28.057 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                12.956681   0.418765
## bay_dem_distancing_pre_post$`% over 125,000`                0.179825   0.008992
## bay_dem_distancing_pre_post$`percent associates or higher`  0.059702   0.008762
## bay_dem_distancing_pre_post$`percent 20-29`                -0.186866   0.015595
## bay_dem_distancing_pre_post$`% Asian`                       0.156425   0.006030
##                                                            t value Pr(>|t|)    
## (Intercept)                                                 30.940  < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                19.999  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   6.814 1.07e-11 ***
## bay_dem_distancing_pre_post$`percent 20-29`                -11.983  < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       25.939  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.337 on 4738 degrees of freedom
## Multiple R-squared:  0.395,  Adjusted R-squared:  0.3945 
## F-statistic: 773.3 on 4 and 4738 DF,  p-value: < 2.2e-16
frac_model_inc_educ_asian_yad <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000`  + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent less than 18` + bay_dem_distancing_pre_post$`% Asian` )
summary(frac_model_inc_educ_asian_yad)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`percent associates or higher` + 
##     bay_dem_distancing_pre_post$`percent less than 18` + bay_dem_distancing_pre_post$`% Asian`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4472 -0.3335 -0.0322  0.2954  3.3258 
## 
## Coefficients:
##                                                              Estimate
## (Intercept)                                                -0.2377859
## bay_dem_distancing_pre_post$`% over 125,000`                0.0139441
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0079114
## bay_dem_distancing_pre_post$`percent less than 18`          0.0200261
## bay_dem_distancing_pre_post$`% Asian`                       0.0050419
##                                                            Std. Error t value
## (Intercept)                                                 0.0349337  -6.807
## bay_dem_distancing_pre_post$`% over 125,000`                0.0006113  22.811
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0006324  12.509
## bay_dem_distancing_pre_post$`percent less than 18`          0.0010965  18.264
## bay_dem_distancing_pre_post$`% Asian`                       0.0004171  12.087
##                                                            Pr(>|t|)    
## (Intercept)                                                1.12e-11 ***
## bay_dem_distancing_pre_post$`% over 125,000`                < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent less than 18`          < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5793 on 4738 degrees of freedom
## Multiple R-squared:  0.3989, Adjusted R-squared:  0.3984 
## F-statistic: 786.1 on 4 and 4738 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, Asian population, 20-29 population, child population

difs_model_inc_educ_asian_yad_child <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000`  + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent 20-29` + bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent less than 18`)
summary(difs_model_inc_educ_asian_yad_child)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`percent associates or higher` + 
##         bay_dem_distancing_pre_post$`percent 20-29` + bay_dem_distancing_pre_post$`% Asian` + 
##         bay_dem_distancing_pre_post$`percent less than 18`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -57.630  -4.173   0.670   5.289  27.785 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                 6.193338   0.615209
## bay_dem_distancing_pre_post$`% over 125,000`                0.160201   0.008894
## bay_dem_distancing_pre_post$`percent associates or higher`  0.095378   0.008904
## bay_dem_distancing_pre_post$`percent 20-29`                -0.118034   0.015951
## bay_dem_distancing_pre_post$`% Asian`                       0.158313   0.005899
## bay_dem_distancing_pre_post$`percent less than 18`          0.237759   0.016139
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  10.07  < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000`                 18.01  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   10.71  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent 20-29`                  -7.40  1.6e-13 ***
## bay_dem_distancing_pre_post$`% Asian`                        26.84  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent less than 18`           14.73  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.154 on 4737 degrees of freedom
## Multiple R-squared:  0.4215, Adjusted R-squared:  0.4209 
## F-statistic: 690.3 on 5 and 4737 DF,  p-value: < 2.2e-16
frac_model_inc_educ_asian_yad_child <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000`  + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent less than 18` + bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent less than 18`)
summary(frac_model_inc_educ_asian_yad_child)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`percent associates or higher` + 
##     bay_dem_distancing_pre_post$`percent less than 18` + bay_dem_distancing_pre_post$`% Asian` + 
##     bay_dem_distancing_pre_post$`percent less than 18`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4472 -0.3335 -0.0322  0.2954  3.3258 
## 
## Coefficients:
##                                                              Estimate
## (Intercept)                                                -0.2377859
## bay_dem_distancing_pre_post$`% over 125,000`                0.0139441
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0079114
## bay_dem_distancing_pre_post$`percent less than 18`          0.0200261
## bay_dem_distancing_pre_post$`% Asian`                       0.0050419
##                                                            Std. Error t value
## (Intercept)                                                 0.0349337  -6.807
## bay_dem_distancing_pre_post$`% over 125,000`                0.0006113  22.811
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0006324  12.509
## bay_dem_distancing_pre_post$`percent less than 18`          0.0010965  18.264
## bay_dem_distancing_pre_post$`% Asian`                       0.0004171  12.087
##                                                            Pr(>|t|)    
## (Intercept)                                                1.12e-11 ***
## bay_dem_distancing_pre_post$`% over 125,000`                < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent less than 18`          < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5793 on 4738 degrees of freedom
## Multiple R-squared:  0.3989, Adjusted R-squared:  0.3984 
## F-statistic: 786.1 on 4 and 4738 DF,  p-value: < 2.2e-16

Multiple regression analysis: income, education, Asian population, 20-29 population, child population, and high speed internet access

difs_model_inc_educ_asian_yad_child_internet <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000`  + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent less than 18` + bay_dem_distancing_pre_post$`percent 20-29` + bay_dem_distancing_pre_post$`% Asian`  + bay_dem_distancing_pre_post$`percent high speed`)
summary(difs_model_inc_educ_asian_yad_child_internet)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~ 
##     bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`percent associates or higher` + 
##         bay_dem_distancing_pre_post$`percent less than 18` + 
##         bay_dem_distancing_pre_post$`percent 20-29` + bay_dem_distancing_pre_post$`% Asian` + 
##         bay_dem_distancing_pre_post$`percent high speed`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -55.638  -4.153   0.688   5.249  27.366 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                 2.354277   0.834499
## bay_dem_distancing_pre_post$`% over 125,000`                0.140469   0.009319
## bay_dem_distancing_pre_post$`percent associates or higher`  0.078889   0.009191
## bay_dem_distancing_pre_post$`percent less than 18`          0.223710   0.016197
## bay_dem_distancing_pre_post$`percent 20-29`                -0.125720   0.015916
## bay_dem_distancing_pre_post$`% Asian`                       0.154854   0.005893
## bay_dem_distancing_pre_post$`percent high speed`            0.074577   0.011014
##                                                            t value Pr(>|t|)    
## (Intercept)                                                  2.821   0.0048 ** 
## bay_dem_distancing_pre_post$`% over 125,000`                15.073  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`   8.583  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent less than 18`          13.812  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent 20-29`                 -7.899 3.47e-15 ***
## bay_dem_distancing_pre_post$`% Asian`                       26.276  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent high speed`             6.771 1.43e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.115 on 4736 degrees of freedom
## Multiple R-squared:  0.427,  Adjusted R-squared:  0.4263 
## F-statistic: 588.3 on 6 and 4736 DF,  p-value: < 2.2e-16
frac_model_inc_educ_asian_yad_child_internet <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000`  + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent less than 18` + bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent less than 18` + bay_dem_distancing_pre_post$`percent high speed`)
summary(frac_model_inc_educ_asian_yad_child_internet)
## 
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + 
##     bay_dem_distancing_pre_post$`percent associates or higher` + 
##     bay_dem_distancing_pre_post$`percent less than 18` + bay_dem_distancing_pre_post$`% Asian` + 
##     bay_dem_distancing_pre_post$`percent less than 18` + bay_dem_distancing_pre_post$`percent high speed`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4740 -0.3336 -0.0345  0.2949  3.3307 
## 
## Coefficients:
##                                                              Estimate
## (Intercept)                                                -0.3493357
## bay_dem_distancing_pre_post$`% over 125,000`                0.0134263
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0074502
## bay_dem_distancing_pre_post$`percent less than 18`          0.0196992
## bay_dem_distancing_pre_post$`% Asian`                       0.0049381
## bay_dem_distancing_pre_post$`percent high speed`            0.0020708
##                                                            Std. Error t value
## (Intercept)                                                 0.0547851  -6.376
## bay_dem_distancing_pre_post$`% over 125,000`                0.0006416  20.927
## bay_dem_distancing_pre_post$`percent associates or higher`  0.0006557  11.362
## bay_dem_distancing_pre_post$`percent less than 18`          0.0011027  17.864
## bay_dem_distancing_pre_post$`% Asian`                       0.0004187  11.794
## bay_dem_distancing_pre_post$`percent high speed`            0.0007838   2.642
##                                                            Pr(>|t|)    
## (Intercept)                                                1.98e-10 ***
## bay_dem_distancing_pre_post$`% over 125,000`                < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher`  < 2e-16 ***
## bay_dem_distancing_pre_post$`percent less than 18`          < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian`                       < 2e-16 ***
## bay_dem_distancing_pre_post$`percent high speed`            0.00827 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.579 on 4737 degrees of freedom
## Multiple R-squared:  0.3998, Adjusted R-squared:  0.3992 
## F-statistic: 631.1 on 5 and 4737 DF,  p-value: < 2.2e-16

This model seems to be the best. Including the age variables led to an increase in about 3% of the variability predicted.